@article{Sutton2005ReinforcementLA,
  title={Reinforcement Learning: An Introduction},
  author={Richard S. Sutton and Andrew G. Barto},
  journal={IEEE Transactions on Neural Networks},
  year={2005},
  volume={16},
  pages={285-286},
  url={https://api.semanticscholar.org/CorpusID:9166388}
}

@Misc{silver2015,
  author = {David Silver},
  title = {Lectures on Reinforcement Learning},
  year = {2015},
  howpublished = {\textsc{url:}~\url{https://www.davidsilver.uk/teaching/}},
}

@misc{deep-rl-course,
  author = {Simonini, Thomas and Sanseviero, Omar},
  title = {The Hugging Face Deep Reinforcement Learning Class},
  year = {2023},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/huggingface/deep-rl-class}},
}

@Misc{stanford,
  author = {Emma Brunskill},
  title = { CS234: Reinforcement Learning Winter 2025 },
  howpublished = {\textsc{url:}~\url{https://web.stanford.edu/class/cs234/}},
  year = {2025},

}

@Misc{spinningup,
  author = {Josh Achiam},
  title = { Spinning Up in Deep Reinforcement Learning },
  howpublished = {\textsc{url:}~\url{https://spinningup.openai.com/}},
  year = {2018},

}

@conference{Kluyver2016jupyter,
  Title = {Jupyter Notebooks -- a publishing format for reproducible computational workflows},
  Author = {Kluyver, Thomas and Ragan-Kelley, Benjamin and P{\'e}rez, Fernando et al},
  Booktitle = {Positioning and Power in Academic Publishing: Players, Agents and Agendas},
  Editor = {F. Loizides and B. Schmidt},
  Organization = {IOS Press},
  Pages = {87 - 90},
  Year = {2016}
}

@software{reback2020pandas,
  author       = {pandas},
  title        = {pandas-dev/pandas: Pandas},
  month        = feb,
  year         = 2020,
  publisher    = {Zenodo},
  version      = {latest},
  doi          = {10.5281/zenodo.3509134},
  url          = {https://doi.org/10.5281/zenodo.3509134}
}

@InProceedings{ mckinney-proc-scipy-2010,
  author    = { {W}es {M}c{K}inney },
  title     = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython },
  booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference },
  pages     = { 56 - 61 },
  year      = { 2010 },
  editor    = { {S}t\'efan van der {W}alt and {J}arrod {M}illman },
  doi       = { 10.25080/Majora-92bf1922-00a }
}

@misc{towers_gymnasium_2023,
  title = {Gymnasium},
  url = {https://zenodo.org/record/8127025},
  abstract = {An API standard for single-agent reinforcement learning environments, with popular reference environments and related utilities (formerly Gym)},
  urldate = {2023-07-08},
  publisher = {Zenodo},
  author = {Towers, Mark and Terry, Jordan K. and Kwiatkowski, Ariel et al},
  month = mar,
  year = {2023},
  doi = {10.5281/zenodo.8127026},
}

@misc{tensorflow2015-whitepaper,
  title={ {TensorFlow}: Large-Scale Machine Learning on Heterogeneous Systems},
  url={https://www.tensorflow.org/},
  note={Software available from tensorflow.org},
  author={
    Abadi, Mart\'{i}n and
    Agarwal,Ashish and
    Barham, Paul et al},
  year={2015},
}

@incollection{NEURIPS2019_9015,
  title = {PyTorch: An Imperative Style, High-Performance Deep Learning Library},
  author = {Paszke, Adam and Gross, Sam and Massa, Francisco et al},
  booktitle = {Advances in Neural Information Processing Systems 32},
  pages = {8024--8035},
  year = {2019},
  publisher = {Curran Associates, Inc.},
  url = {http://papers.neurips.cc/paper/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf}
} 

@misc{gym,
  Author = {Brockman, Greg and Cheung, Vicki and Pettersson, Ludwig et al},
  Title = {OpenAI Gym},
  Year = {2016},
  Eprint = {arXiv:1606.01540},
}

@article{stable-baselines3,
  author  = {Raffin, Antonin and Hill,Ashley and Gleave,Adam et al},
  title   = {Stable-Baselines3: Reliable Reinforcement Learning Implementations},
  journal = {Journal of Machine Learning Research},
  year    = {2021},
  volume  = {22},
  number  = {268},
  pages   = {1-8},
  url     = {http://jmlr.org/papers/v22/20-1364.html}
}

@article{SilverHuangEtAl16nature,
  abstract = {The game of Go has long been viewed as the most challenging of classic games for artificial intelligence owing to its enormous search space and the difficulty of evaluating board positions and moves. Here we introduce a new approach to computer Go that uses 'value networks' to evaluate board positions and 'policy networks' to select moves. These deep neural networks are trained by a novel combination of supervised learning from human expert games, and reinforcement learning from games of self-play. Without any lookahead search, the neural networks play Go at the level of state-of-the-art Monte Carlo tree search programs that simulate thousands of random games of self-play. We also introduce a new search algorithm that combines Monte Carlo simulation with value and policy networks. Using this search algorithm, our program AlphaGo achieved a 99.8 percent winning rate against other Go programs, and defeated the human European Go champion by 5 games to 0. This is the first time that a computer program has defeated a human professional player in the full-sized game of Go, a feat previously thought to be at least a decade away.},
  added-at = {2016-05-21T09:09:48.000+0200},
  author = {Silver, David and Huang, Aja and Maddison, Chris J. et al},
  biburl = {https://www.bibsonomy.org/bibtex/29e987f58d895c490144693139cbc90c7/flint63},
  doi = {10.1038/nature16961},
  file = {Nature online:2016/SilverHuangEtAl16nature.pdf:PDF},
  groups = {public},
  interhash = {48430c7891aaf9fe2582faa8f5d076c1},
  intrahash = {9e987f58d895c490144693139cbc90c7},
  issn = {0028-0836},
  journal = {Nature},
  keywords = {01614 paper ai google learn algorithm},
  month = {#jan#},
  number = 7587,
  pages = {484--489},
  timestamp = {2018-04-16T12:03:12.000+0200},
  title = {Mastering the Game of {Go} with Deep Neural Networks and Tree Search},
  username = {flint63},
  volume = 529,
  year = 2016
}



@article{chess,
  author       = {Silver, David and
                  Hubert, Thomas and
                  Schrittwieser, Julian et al},
  title        = {Mastering Chess and Shogi by Self-Play with a General Reinforcement
                  Learning Algorithm},
  journal      = {CoRR},
  volume       = {abs/1712.01815},
  year         = {2017},
  url          = {http://arxiv.org/abs/1712.01815},
  eprinttype    = {arXiv},
  eprint       = {1712.01815},
  timestamp    = {Mon, 13 Aug 2018 16:46:01 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1712-01815.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{atari-first,
  author       = {Mnih, Volodymyr and
                  Kavukcuoglu, Koray and
                  Silver, David et al},
  title        = {Playing Atari with Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1312.5602},
  year         = {2013},
  url          = {http://arxiv.org/abs/1312.5602},
  eprinttype    = {arXiv},
  eprint       = {1312.5602},
  timestamp    = {Mon, 13 Aug 2018 16:47:42 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/MnihKSGAWR13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{starcraft2,
  author       = {Vinyals, Oriol and
                  Babuschkin, Igor and
                  M. Czarnecki, Wojciech et al},
  title        = {Grandmaster level in StarCraft {II} using multi-agent reinforcement
                  learning},
  journal      = {Nat.},
  volume       = {575},
  number       = {7782},
  pages        = {350--354},
  year         = {2019},
  url          = {https://doi.org/10.1038/s41586-019-1724-z},
  doi          = {10.1038/s41586-019-1724-z},
  timestamp    = {Sun, 02 Oct 2022 15:44:36 +0200},
  biburl       = {https://dblp.org/rec/journals/nature/VinyalsBCMDCCPE19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{motors,
  author = {Book, Gerrit and Traue, Arne and Balakrishna, Praneeth et al},
  year = {2021},
  month = {03},
  pages = {187 - 201},
  title = {Transferring Online Reinforcement Learning for Electric Motor Control From Simulation to Real-World Experiments},
  volume = {2},
  journal = {IEEE Open Journal of Power Electronics},
  doi = {10.1109/OJPEL.2021.3065877}
}

@article{robotics,
  author = {Jens Kober and J. Andrew Bagnell and Jan Peters},
  title ={Reinforcement learning in robotics: A survey},
  journal = {The International Journal of Robotics Research},
  volume = {32},
  number = {11},
  pages = {1238-1274},
  year = {2013},
  doi = {10.1177/0278364913495721},
  URL = { https://doi.org/10.1177/0278364913495721},
  eprint = { https://doi.org/10.1177/0278364913495721},
  abstract = { Reinforcement learning offers to robotics a framework and set of tools for the design of sophisticated and hard-to-engineer behaviors. Conversely, the challenges of robotic problems provide both inspiration, impact, and validation for developments in reinforcement learning. The relationship between disciplines has sufficient promise to be likened to that between physics and mathematics. In this article, we attempt to strengthen the links between the two research communities by providing a survey of work in reinforcement learning for behavior generation in robots. We highlight both key challenges in robot reinforcement learning as well as notable successes. We discuss how contributions tamed the complexity of the domain and study the role of algorithms, representations, and prior knowledge in achieving these successes. As a result, a particular focus of our paper lies on the choice between model-based and model-free as well as between value-function-based and policy-search methods. By analyzing a simple problem in some detail we demonstrate how reinforcement learning approaches may be profitably applied, and we note throughout open questions and the tremendous potential for future research. }
  }

@misc{rlhf,
  title={Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback}, 
  author={Bai,Yuntao and Jones, Andy and Ndousse, Kamal et al},
  year={2022},
  eprint={2204.05862},
  archivePrefix={arXiv},
  primaryClass={cs.CL}
}

@misc{zejnullahu2022applications,
  title={Applications of Reinforcement Learning in Finance -- Trading with a Double Deep Q-Network}, 
  author={Frensi Zejnullahu and Maurice Moser and Joerg Osterrieder},
  year={2022},
  eprint={2206.14267},
  archivePrefix={arXiv},
  primaryClass={cs.LG}
}

@article{CORONATO2020101964,
  title = {Reinforcement learning for intelligent healthcare applications: A survey},
  journal = {Artificial Intelligence in Medicine},
  volume = {109},
  pages = {101964},
  year = {2020},
  issn = {0933-3657},
  doi = {10.1016/j.artmed.2020.101964},
  url = {https://www.sciencedirect.com/science/article/pii/S093336572031229X},
  author = {Antonio Coronato and Muddasar Naeem and Giuseppe {De Pietro} and Giovanni Paragliola},
  keywords = {Artificial intelligence, Reinforcement learning, Healthcare, Personalized medicine},
  abstract = {Discovering new treatments and personalizing existing ones is one of the major goals of modern clinical research. In the last decade, Artificial Intelligence (AI) has enabled the realization of advanced intelligent systems able to learn about clinical treatments and discover new medical knowledge from the huge amount of data collected. Reinforcement Learning (RL), which is a branch of Machine Learning (ML), has received significant attention in the medical community since it has the potentiality to support the development of personalized treatments in accordance with the more general precision medicine vision. This report presents a review of the role of RL in healthcare by investigating past work, and highlighting any limitations and possible future contributions.}
}

@article{traffic,
  author       = {Yilun Lin and
                  Xingyuan Dai and
                  Li Li and
                  Fei{-}Yue Wang},
  title        = {An Efficient Deep Reinforcement Learning Model for Urban Traffic Control},
  journal      = {CoRR},
  volume       = {abs/1808.01876},
  year         = {2018},
  url          = {http://arxiv.org/abs/1808.01876},
  eprinttype    = {arXiv},
  eprint       = {1808.01876},
  timestamp    = {Mon, 21 Oct 2019 17:15:44 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1808-01876.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{sklearn_api,
  author    = {Buitinck, Lars and Louppe, Gilles and Blondel, Mathieu et al},
  title     = {{API} design for machine learning software: experiences from the scikit-learn
               project},
  booktitle = {ECML PKDD Workshop: Languages for Data Mining and Machine Learning},
  year      = {2013},
  pages = {108--122},
}

@INPROCEEDINGS{10182718,

  author={Weber, Daniel and Schenke, Maximilian and Wallscheid, Oliver},
  booktitle={2023 International Conference on Future Energy Solutions (FES)}, 
  title={Safe Reinforcement Learning-Based Control in Power Electronic Systems}, 
  year={2023},
  volume={},
  number={},
  pages={1-6},
  doi={10.1109/FES57669.2023.10182718}}

@article{deep_q_torque,
  author = {Schenke, Maximilian and Wallscheid, Oliver},
  year = {2021},
  month = {04},
  pages = {388-400},
  title = {A Deep Q-Learning Direct Torque Controller for Permanent Magnet Synchronous Motors},
  volume = {2},
  journal = {IEEE Open Journal of the Industrial Electronics Society},
  doi = {10.1109/OJIES.2021.3075521}
}